#### 21-02-25
#install.packages(c("glmmTMB", "lme4"))
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(performance)
library(summarytools)
##
## Attaching package: 'summarytools'
##
## The following object is masked from 'package:tibble':
##
## view
library(ggeffects)
library(marginaleffects)
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.22.10). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
##
## The following object is masked from 'package:lme4':
##
## ngrps
##
## The following object is masked from 'package:stats':
##
## ar
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(viridis)
## Loading required package: viridisLite
library(broom)
library(knitr)
library(dplyr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(gt)
library(modelsummary)
library(sjPlot)
##
## Attaching package: 'sjPlot'
##
## The following object is masked from 'package:ggplot2':
##
## set_theme
library(glmmTMB)
## Warning in check_dep_version(dep_pkg = "TMB"): package version mismatch:
## glmmTMB was built with TMB package version 1.9.17
## Current TMB package version is 1.9.18
## Please re-install glmmTMB from source or restore original 'TMB' package (see '?reinstalling' for more information)
##
## Attaching package: 'glmmTMB'
##
## The following object is masked from 'package:brms':
##
## lognormal
library(COMPoissonReg)
## Loading required package: numDeriv
library(Rcpp)
library(numDeriv)
complete_2023_2024 <- read.csv("input/2023_2024_all_moth_counts.csv")
#clean_complete - full summer complete moth counts available for 2023 and 2024
# Data Cleaning -----------------------------------------------------------
##To explore the distribution of your variables and count data like clean_complete
# quick visualizations
dfSummary(complete_2023_2024)
## Data Frame Summary
## complete_2023_2024
## Dimensions: 255 x 20
## Duplicates: 0
##
## ------------------------------------------------------------------------------------------------------------------------
## No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing
## ---- ------------------- ----------------------------- --------------------- --------------------- ---------- ----------
## 1 patch_name 1. Oka 44 (17.3%) III 255 0
## [character] 2. Orford 39 (15.3%) III (100.0%) (0.0%)
## 3. Rigaud 30 (11.8%) II
## 4. Mont_Saint_Hilaire 29 (11.4%) II
## 5. Montebello 22 ( 8.6%) I
## 6. Notre_Dame_de_Bonsecours 18 ( 7.1%) I
## 7. Mont_Royal 13 ( 5.1%) I
## 8. Mont_Saint_Bruno 12 ( 4.7%)
## 9. Papineauville 12 ( 4.7%)
## 10. Brownsburg 6 ( 2.4%)
## [ 5 others ] 30 (11.8%) II
##
## 2 Years Min : 2023 2023 : 102 (40.0%) IIIIIIII 255 0
## [integer] Mean : 2023.6 2024 : 153 (60.0%) IIIIIIIIIIII (100.0%) (0.0%)
## Max : 2024
##
## 3 Year Min : 0 0 : 102 (40.0%) IIIIIIII 255 0
## [integer] Mean : 0.6 1 : 153 (60.0%) IIIIIIIIIIII (100.0%) (0.0%)
## Max : 1
##
## 4 stand_type 1. MOM 10 ( 3.9%) 255 0
## [character] 2. Oak 73 (28.6%) IIIII (100.0%) (0.0%)
## 3. Oak/Other 33 (12.9%) II
## 4. Oak/Pine 41 (16.1%) III
## 5. Other 30 (11.8%) II
## 6. Pine 39 (15.3%) III
## 7. Pine/Oak 29 (11.4%) II
##
## 5 landscape_type 1. agricultural 139 (54.5%) IIIIIIIIII 255 0
## [character] 2. forest· 85 (33.3%) IIIIII (100.0%) (0.0%)
## 3. urban 31 (12.2%) II
##
## 6 stand_category 1. C 35 (17.9%) III 196 59
## [character] 2. I 29 (14.8%) II (76.9%) (23.1%)
## 3. E 24 (12.2%) II
## 4. H 18 ( 9.2%) I
## 5. F 17 ( 8.7%) I
## 6. L 15 ( 7.7%) I
## 7. A 10 ( 5.1%) I
## 8. D 10 ( 5.1%) I
## 9. K 10 ( 5.1%) I
## 10. MOM 10 ( 5.1%) I
## [ 4 others ] 18 ( 9.2%) I
##
## 7 Percent_Oak Mean (sd) : 0.3 (0.2) 0.00 : 48 (18.8%) III 255 0
## [numeric] min < med < max: 0.10 : 22 ( 8.6%) I (100.0%) (0.0%)
## 0 < 0.3 < 0.8 0.20 : 10 ( 3.9%)
## IQR (CV) : 0.4 (0.7) 0.30 : 58 (22.7%) IIII
## 0.40 : 50 (19.6%) III
## 0.50 : 18 ( 7.1%) I
## 0.60 : 18 ( 7.1%) I
## 0.70 : 19 ( 7.5%) I
## 0.80 : 12 ( 4.7%)
##
## 8 Percent_Pine Mean (sd) : 0.2 (0.2) 0.00 : 117 (45.9%) IIIIIIIII 255 0
## [numeric] min < med < max: 0.10 : 29 (11.4%) II (100.0%) (0.0%)
## 0 < 0.1 < 0.8 0.20 : 27 (10.6%) II
## IQR (CV) : 0.4 (1.2) 0.30 : 12 ( 4.7%)
## 0.40 : 21 ( 8.2%) I
## 0.50 : 19 ( 7.5%) I
## 0.60 : 9 ( 3.5%)
## 0.70 : 15 ( 5.9%) I
## 0.80 : 6 ( 2.4%)
##
## 9 Percent_Maple Mean (sd) : 0.3 (0.2) 10 distinct values II 246 9
## [numeric] min < med < max: III (96.5%) (3.5%)
## 0 < 0.2 < 0.9 IIII
## IQR (CV) : 0.3 (0.6) IIII
## IIII
##
## 10 Forest_Type 1. oak 65 (25.5%) IIIII 255 0
## [character] 2. oak_maple 48 (18.8%) III (100.0%) (0.0%)
## 3. pine 45 (17.6%) III
## 4. oak_pine 21 ( 8.2%) I
## 5. maple_oak 16 ( 6.3%) I
## 6. pine_oak 14 ( 5.5%) I
## 7. maple 12 ( 4.7%)
## 8. maple_birch 7 ( 2.7%)
## 9. maple_hardwood? 5 ( 2.0%)
## 10. pine_hemlock 5 ( 2.0%)
## [ 9 others ] 17 ( 6.7%) I
##
## 11 stand_area_ha Mean (sd) : 8.4 (3.5) 67 distinct values . : 255 0
## [numeric] min < med < max: : : : (100.0%) (0.0%)
## 3.5 < 8 < 25.9 : : :
## IQR (CV) : 3.2 (0.4) : : : :
## : : : : : .
##
## 12 forest_area_km2 Mean (sd) : 210.2 (436.8) 13 distinct values : 255 0
## [numeric] min < med < max: : (100.0%) (0.0%)
## 1.6 < 14 < 1282 :
## IQR (CV) : 95.7 (2.1) :
## : :
##
## 13 trap_name 1. BC Co-Dom1 1 ( 0.4%) 255 0
## [character] 2. BC Co-Dom2 1 ( 0.4%) (100.0%) (0.0%)
## 3. BC Dom1 1 ( 0.4%)
## 4. BC Dom2 1 ( 0.4%)
## 5. BC Low 1 1 ( 0.4%)
## 6. BC Low 2 1 ( 0.4%)
## 7. Bolt Co-Dom1 1 ( 0.4%)
## 8. Bolt Co-Dom2 1 ( 0.4%)
## 9. Bolt Dom1 1 ( 0.4%)
## 10. Bolt Dom2 1 ( 0.4%)
## [ 245 others ] 245 (96.1%) IIIIIIIIIIIIIIIIIII
##
## 14 longitude Mean (sd) : -73.7 (1) 255 distinct values : 255 0
## [numeric] min < med < max: : (100.0%) (0.0%)
## -75 < -74 < -72 : : : :
## IQR (CV) : 1.2 (0) : : : :
## : : : : : : .
##
## 15 total_moth_count Mean (sd) : 57.6 (51.4) 113 distinct values : 252 3
## [integer] min < med < max: : (98.8%) (1.2%)
## 0 < 47.5 < 378 : .
## IQR (CV) : 55.5 (0.9) : :
## : : :
##
## 16 complete Mean (sd) : 72.8 (60.9) 92 distinct values : 142 113
## [integer] min < med < max: : : (55.7%) (44.3%)
## 3 < 61 < 378 : :
## IQR (CV) : 68.8 (0.8) : : :
## : : : . .
##
## 17 clean_complete Mean (sd) : 62.3 (55.4) 140 distinct values : 200 55
## [numeric] min < med < max: : . (78.4%) (21.6%)
## 0 < 49.8 < 378 : :
## IQR (CV) : 59 (0.9) : :
## : : :
##
## 18 censored Min : 0 0 : 200 (78.4%) IIIIIIIIIIIIIII 255 0
## [integer] Mean : 0.2 1 : 55 (21.6%) IIII (100.0%) (0.0%)
## Max : 1
##
## 19 reason_incomplete 1. (Empty string) 84 (60.4%) IIIIIIIIIIII 139 116
## [character] 2. Missing in field 2 ( 1.4%) (54.5%) (45.5%)
## 3. Muck 44 (31.7%) IIIIII
## 4. Trap damaged· 9 ( 6.5%) I
##
## 20 X All NA's 0 255
## [logical] (0.0%) (100.0%)
## ------------------------------------------------------------------------------------------------------------------------
str(complete_2023_2024)
## 'data.frame': 255 obs. of 20 variables:
## $ patch_name : chr "Mont_Royal" "Mont_Royal" "Mont_Royal" "Mont_Royal" ...
## $ Years : int 2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 ...
## $ Year : int 1 1 1 1 1 1 1 1 1 1 ...
## $ stand_type : chr "Oak" "Oak" "Oak" "Oak" ...
## $ landscape_type : chr "urban" "urban" "urban" "urban" ...
## $ stand_category : chr "E" "A" "C" "C" ...
## $ Percent_Oak : num 0.4 0.8 0.6 0.6 0.6 0.4 0.4 0.4 0.4 0.4 ...
## $ Percent_Pine : num 0 0 0 0 0 0 0 0 0.1 0.1 ...
## $ Percent_Maple : num 0.3 0.1 0.3 0.3 0.3 0.3 0.3 0.3 0 0 ...
## $ Forest_Type : chr "oak_maple" "oak" "oak" "oak" ...
## $ stand_area_ha : num 14 6.4 8.6 8.6 8.6 11.7 11.7 11.7 8.9 8.9 ...
## $ forest_area_km2 : num 4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 ...
## $ trap_name : chr "MRMOM" "MROak2.1" "MROak1.1" "MROak1.2" ...
## $ longitude : num -73.6 -73.6 -73.6 -73.6 -73.6 ...
## $ total_moth_count : int 93 15 7 20 6 96 52 NA 60 20 ...
## $ complete : int 93 15 7 20 6 96 52 NA 60 20 ...
## $ clean_complete : num 93 15 7 20 6 96 52 NA 60 20 ...
## $ censored : int 0 0 0 0 0 0 0 1 0 0 ...
## $ reason_incomplete: chr NA NA NA NA ...
## $ X : logi NA NA NA NA NA NA ...
## change 'Year' from int to chr
complete_2023_2024$Year <- as.character(complete_2023_2024$Year)
str(complete_2023_2024)
## 'data.frame': 255 obs. of 20 variables:
## $ patch_name : chr "Mont_Royal" "Mont_Royal" "Mont_Royal" "Mont_Royal" ...
## $ Years : int 2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 ...
## $ Year : chr "1" "1" "1" "1" ...
## $ stand_type : chr "Oak" "Oak" "Oak" "Oak" ...
## $ landscape_type : chr "urban" "urban" "urban" "urban" ...
## $ stand_category : chr "E" "A" "C" "C" ...
## $ Percent_Oak : num 0.4 0.8 0.6 0.6 0.6 0.4 0.4 0.4 0.4 0.4 ...
## $ Percent_Pine : num 0 0 0 0 0 0 0 0 0.1 0.1 ...
## $ Percent_Maple : num 0.3 0.1 0.3 0.3 0.3 0.3 0.3 0.3 0 0 ...
## $ Forest_Type : chr "oak_maple" "oak" "oak" "oak" ...
## $ stand_area_ha : num 14 6.4 8.6 8.6 8.6 11.7 11.7 11.7 8.9 8.9 ...
## $ forest_area_km2 : num 4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 4.89 ...
## $ trap_name : chr "MRMOM" "MROak2.1" "MROak1.1" "MROak1.2" ...
## $ longitude : num -73.6 -73.6 -73.6 -73.6 -73.6 ...
## $ total_moth_count : int 93 15 7 20 6 96 52 NA 60 20 ...
## $ complete : int 93 15 7 20 6 96 52 NA 60 20 ...
## $ clean_complete : num 93 15 7 20 6 96 52 NA 60 20 ...
## $ censored : int 0 0 0 0 0 0 0 1 0 0 ...
## $ reason_incomplete: chr NA NA NA NA ...
## $ X : logi NA NA NA NA NA NA ...
# remove space in trap_name
# replace '-' with '_' in trap_name
complete_2023_2024 <- complete_2023_2024 %>%
mutate(trap_name = str_replace(trap_name, " ", ""))
complete_2023_2024 <- complete_2023_2024 %>%
mutate(trap_name = str_replace(trap_name, "-", "_"))
#Remove MOM traps from either "stand type" or "stand category"
stand_type_filtered <- complete_2023_2024 %>%
filter(stand_type != "MOM")
stand_category_filtered <- complete_2023_2024 %>%
filter(stand_category != "MOM")
# looking for mistakes
unique(complete_2023_2024$stand_type)
## [1] "Oak" "Oak/Pine" "Pine" "MOM" "Pine/Oak" "Oak/Other"
## [7] "Other"
unique(complete_2023_2024$patch_name)
## [1] "Mont_Royal" "Mont_Saint_Hilaire"
## [3] "Montebello" "Notre_Dame_de_Bonsecours"
## [5] "Oka" "Orford"
## [7] "Papineauville" "Rigaud"
## [9] "Brownsburg" "Hatley"
## [11] "Kenauk" "Mont_Gauvin/Glen "
## [13] "Mont_Saint_Bruno" "Parc_Michel_Chartrand"
## [15] "Parc_Pointe_aux_Prairies"
unique(complete_2023_2024$Year)
## [1] "1" "0"
unique(complete_2023_2024$landscape_type)
## [1] "urban" "agricultural" "forest "
unique(complete_2023_2024$stand_category)
## [1] "E" "A" "C" "L" "MOM" "D" "H" "I" "B" "G" "K" "M"
## [13] "F" "J" NA
unique(stand_type_filtered$stand_type)
## [1] "Oak" "Oak/Pine" "Pine" "Pine/Oak" "Oak/Other" "Other"
unique(stand_category_filtered$stand_category)
## [1] "E" "A" "C" "L" "D" "H" "I" "B" "G" "K" "M" "F" "J"
# Data Summaries ----------------------------------------------------------
##separate Stand Type column so that we have a Stand ID for each stand in each patch
stand_ID_filtered <- stand_type_filtered %>%
separate(trap_name, into = c("stand_ID", "trap_number"), remove = FALSE, sep = "\\." ) %>%
glimpse()
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 103 rows [1, 144, 145,
## 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161,
## 162, ...].
## Rows: 245
## Columns: 22
## $ patch_name <chr> "Mont_Royal", "Mont_Royal", "Mont_Royal", "Mont_Roya…
## $ Years <int> 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024…
## $ Year <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1…
## $ stand_type <chr> "Oak", "Oak", "Oak", "Oak", "Oak", "Oak", "Oak", "Oa…
## $ landscape_type <chr> "urban", "urban", "urban", "urban", "urban", "urban"…
## $ stand_category <chr> "E", "A", "C", "C", "C", "E", "E", "E", "E", "E", "E…
## $ Percent_Oak <dbl> 0.4, 0.8, 0.6, 0.6, 0.6, 0.4, 0.4, 0.4, 0.4, 0.4, 0.…
## $ Percent_Pine <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.…
## $ Percent_Maple <dbl> 0.3, 0.1, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.0, 0.0, 0.…
## $ Forest_Type <chr> "oak_maple", "oak", "oak", "oak", "oak", "oak_maple"…
## $ stand_area_ha <dbl> 14.0, 6.4, 8.6, 8.6, 8.6, 11.7, 11.7, 11.7, 8.9, 8.9…
## $ forest_area_km2 <dbl> 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89…
## $ trap_name <chr> "MRMOM", "MROak2.1", "MROak1.1", "MROak1.2", "MROak1…
## $ stand_ID <chr> "MRMOM", "MROak2", "MROak1", "MROak1", "MROak1", "MR…
## $ trap_number <chr> NA, "1", "1", "2", "3", "1", "2", "3", "1", "2", "3"…
## $ longitude <dbl> -73.59218, -73.58859, -73.58863, -73.58738, -73.5892…
## $ total_moth_count <int> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ complete <int> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ clean_complete <dbl> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ censored <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1…
## $ reason_incomplete <chr> NA, NA, NA, NA, NA, NA, NA, "Missing in field", NA, …
## $ X <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
stand_ID_filtered_1 <- stand_category_filtered %>%
separate(trap_name, into = c("stand_ID", "trap_number"), remove = FALSE, sep = "\\." ) %>%
glimpse()
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 44 rows [1, 144, 145,
## 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161,
## 162, ...].
## Rows: 186
## Columns: 22
## $ patch_name <chr> "Mont_Royal", "Mont_Royal", "Mont_Royal", "Mont_Roya…
## $ Years <int> 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024…
## $ Year <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1…
## $ stand_type <chr> "Oak", "Oak", "Oak", "Oak", "Oak", "Oak", "Oak", "Oa…
## $ landscape_type <chr> "urban", "urban", "urban", "urban", "urban", "urban"…
## $ stand_category <chr> "E", "A", "C", "C", "C", "E", "E", "E", "E", "E", "E…
## $ Percent_Oak <dbl> 0.4, 0.8, 0.6, 0.6, 0.6, 0.4, 0.4, 0.4, 0.4, 0.4, 0.…
## $ Percent_Pine <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.1, 0.…
## $ Percent_Maple <dbl> 0.3, 0.1, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.0, 0.0, 0.…
## $ Forest_Type <chr> "oak_maple", "oak", "oak", "oak", "oak", "oak_maple"…
## $ stand_area_ha <dbl> 14.0, 6.4, 8.6, 8.6, 8.6, 11.7, 11.7, 11.7, 8.9, 8.9…
## $ forest_area_km2 <dbl> 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89, 4.89…
## $ trap_name <chr> "MRMOM", "MROak2.1", "MROak1.1", "MROak1.2", "MROak1…
## $ stand_ID <chr> "MRMOM", "MROak2", "MROak1", "MROak1", "MROak1", "MR…
## $ trap_number <chr> NA, "1", "1", "2", "3", "1", "2", "3", "1", "2", "3"…
## $ longitude <dbl> -73.59218, -73.58859, -73.58863, -73.58738, -73.5892…
## $ total_moth_count <int> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ complete <int> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ clean_complete <dbl> 93, 15, 7, 20, 6, 96, 52, NA, 60, 20, 15, 17, 25, 3,…
## $ censored <int> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1…
## $ reason_incomplete <chr> NA, NA, NA, NA, NA, NA, NA, "Missing in field", NA, …
## $ X <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
total_moths_2023_2024 <- sum(complete_2023_2024$clean_complete, na.rm = TRUE)
print(total_moths_2023_2024)
## [1] 12456
n_distinct(complete_2023_2024$trap_name)
## [1] 255
n_distinct(stand_ID_filtered$stand_ID)
## [1] 155
n_distinct(stand_ID_filtered$patch_name)
## [1] 15
#check to see the distribution of moth count data
hist(complete_2023_2024$clean_complete,
main = " ",
xlab = "Spongy moth count/trap",
ylab = "Frequency",
col = "darkblue",
border = "black")

hist(complete_2023_2024$clean_complete,
main = " ",
xlab = "Spongy moth count/trap",
ylab = "Frequency",
col = "blue",
border = "black",
breaks = 50) # increase this number to make bins smaller

#Calculate the mean and standard deviation of moth counts for each
#level of stand_category to see if there are differences.
# Checking unique combinations
table(stand_category_filtered$stand_category,
stand_category_filtered$patch_name)
##
## Brownsburg Hatley Kenauk Mont_Gauvin/Glen Mont_Royal Mont_Saint_Bruno
## A 0 0 1 0 1 1
## B 0 0 0 0 0 0
## C 2 2 1 2 3 3
## D 0 0 0 0 0 0
## E 0 0 0 0 8 0
## F 0 0 0 0 0 0
## G 0 0 0 0 0 0
## H 0 0 0 0 0 0
## I 0 0 0 0 0 0
## J 0 0 0 0 0 0
## K 0 0 0 0 0 0
## L 2 0 0 0 1 0
## M 0 0 0 0 0 0
##
## Mont_Saint_Hilaire Montebello Notre_Dame_de_Bonsecours Oka Orford
## A 0 0 0 0 3
## B 0 2 0 0 0
## C 8 4 6 0 2
## D 3 0 0 0 0
## E 0 0 0 8 6
## F 2 0 5 0 5
## G 0 2 0 1 0
## H 4 0 1 8 0
## I 2 5 0 18 0
## J 0 0 0 0 4
## K 0 2 0 5 0
## L 1 0 0 2 7
## M 0 2 2 0 2
##
## Papineauville Parc_Michel_Chartrand Rigaud
## A 4 0 0
## B 0 0 0
## C 0 2 0
## D 0 0 7
## E 2 0 0
## F 0 0 5
## G 0 0 0
## H 2 0 3
## I 0 0 4
## J 0 0 3
## K 0 0 3
## L 0 0 2
## M 0 0 0
table(stand_type_filtered$stand_type,
stand_type_filtered$patch_name)
##
## Brownsburg Hatley Kenauk Mont_Gauvin/Glen Mont_Royal
## Oak 2 2 1 2 8
## Oak/Other 2 2 2 2 0
## Oak/Pine 0 0 1 0 4
## Other 0 2 2 2 0
## Pine 2 0 0 0 1
## Pine/Oak 0 0 0 0 0
##
## Mont_Saint_Bruno Mont_Saint_Hilaire Montebello
## Oak 3 8 6
## Oak/Other 5 3 2
## Oak/Pine 0 8 2
## Other 4 4 2
## Pine 0 1 5
## Pine/Oak 0 2 4
##
## Notre_Dame_de_Bonsecours Oka Orford Papineauville
## Oak 6 6 14 6
## Oak/Other 1 2 4 2
## Oak/Pine 5 9 2 2
## Other 2 0 4 2
## Pine 2 7 13 0
## Pine/Oak 1 18 0 0
##
## Parc_Michel_Chartrand Parc_Pointe_aux_Prairies Rigaud
## Oak 2 0 7
## Oak/Other 2 2 2
## Oak/Pine 0 0 8
## Other 2 4 0
## Pine 0 0 8
## Pine/Oak 0 0 4
# Set the levels of 'stand_type' to ensure the correct order
stand_type_filtered$stand_type <- factor(stand_type_filtered$stand_type,
levels = c("Oak", "Oak/Pine", "Oak/Other",
"Pine/Oak", "Pine", "Other"))
# Create the 'stand-type by patch' table
table(stand_type_filtered$stand_type, stand_type_filtered$patch_name)
##
## Brownsburg Hatley Kenauk Mont_Gauvin/Glen Mont_Royal
## Oak 2 2 1 2 8
## Oak/Pine 0 0 1 0 4
## Oak/Other 2 2 2 2 0
## Pine/Oak 0 0 0 0 0
## Pine 2 0 0 0 1
## Other 0 2 2 2 0
##
## Mont_Saint_Bruno Mont_Saint_Hilaire Montebello
## Oak 3 8 6
## Oak/Pine 0 8 2
## Oak/Other 5 3 2
## Pine/Oak 0 2 4
## Pine 0 1 5
## Other 4 4 2
##
## Notre_Dame_de_Bonsecours Oka Orford Papineauville
## Oak 6 6 14 6
## Oak/Pine 5 9 2 2
## Oak/Other 1 2 4 2
## Pine/Oak 1 18 0 0
## Pine 2 7 13 0
## Other 2 0 4 2
##
## Parc_Michel_Chartrand Parc_Pointe_aux_Prairies Rigaud
## Oak 2 0 7
## Oak/Pine 0 0 8
## Oak/Other 2 2 2
## Pine/Oak 0 0 4
## Pine 0 0 8
## Other 2 4 0
# Create the contingency table
contingency_table <- table(stand_type_filtered$stand_type, stand_type_filtered$patch_name)
# Convert the table to a data frame
contingency_df <- as.data.frame(contingency_table)
# Heatmap stands by patch -------------------------------------------------
# Create a plot (heatmap) of the contingency table
contingency_df
## Var1 Var2 Freq
## 1 Oak Brownsburg 2
## 2 Oak/Pine Brownsburg 0
## 3 Oak/Other Brownsburg 2
## 4 Pine/Oak Brownsburg 0
## 5 Pine Brownsburg 2
## 6 Other Brownsburg 0
## 7 Oak Hatley 2
## 8 Oak/Pine Hatley 0
## 9 Oak/Other Hatley 2
## 10 Pine/Oak Hatley 0
## 11 Pine Hatley 0
## 12 Other Hatley 2
## 13 Oak Kenauk 1
## 14 Oak/Pine Kenauk 1
## 15 Oak/Other Kenauk 2
## 16 Pine/Oak Kenauk 0
## 17 Pine Kenauk 0
## 18 Other Kenauk 2
## 19 Oak Mont_Gauvin/Glen 2
## 20 Oak/Pine Mont_Gauvin/Glen 0
## 21 Oak/Other Mont_Gauvin/Glen 2
## 22 Pine/Oak Mont_Gauvin/Glen 0
## 23 Pine Mont_Gauvin/Glen 0
## 24 Other Mont_Gauvin/Glen 2
## 25 Oak Mont_Royal 8
## 26 Oak/Pine Mont_Royal 4
## 27 Oak/Other Mont_Royal 0
## 28 Pine/Oak Mont_Royal 0
## 29 Pine Mont_Royal 1
## 30 Other Mont_Royal 0
## 31 Oak Mont_Saint_Bruno 3
## 32 Oak/Pine Mont_Saint_Bruno 0
## 33 Oak/Other Mont_Saint_Bruno 5
## 34 Pine/Oak Mont_Saint_Bruno 0
## 35 Pine Mont_Saint_Bruno 0
## 36 Other Mont_Saint_Bruno 4
## 37 Oak Mont_Saint_Hilaire 8
## 38 Oak/Pine Mont_Saint_Hilaire 8
## 39 Oak/Other Mont_Saint_Hilaire 3
## 40 Pine/Oak Mont_Saint_Hilaire 2
## 41 Pine Mont_Saint_Hilaire 1
## 42 Other Mont_Saint_Hilaire 4
## 43 Oak Montebello 6
## 44 Oak/Pine Montebello 2
## 45 Oak/Other Montebello 2
## 46 Pine/Oak Montebello 4
## 47 Pine Montebello 5
## 48 Other Montebello 2
## 49 Oak Notre_Dame_de_Bonsecours 6
## 50 Oak/Pine Notre_Dame_de_Bonsecours 5
## 51 Oak/Other Notre_Dame_de_Bonsecours 1
## 52 Pine/Oak Notre_Dame_de_Bonsecours 1
## 53 Pine Notre_Dame_de_Bonsecours 2
## 54 Other Notre_Dame_de_Bonsecours 2
## 55 Oak Oka 6
## 56 Oak/Pine Oka 9
## 57 Oak/Other Oka 2
## 58 Pine/Oak Oka 18
## 59 Pine Oka 7
## 60 Other Oka 0
## 61 Oak Orford 14
## 62 Oak/Pine Orford 2
## 63 Oak/Other Orford 4
## 64 Pine/Oak Orford 0
## 65 Pine Orford 13
## 66 Other Orford 4
## 67 Oak Papineauville 6
## 68 Oak/Pine Papineauville 2
## 69 Oak/Other Papineauville 2
## 70 Pine/Oak Papineauville 0
## 71 Pine Papineauville 0
## 72 Other Papineauville 2
## 73 Oak Parc_Michel_Chartrand 2
## 74 Oak/Pine Parc_Michel_Chartrand 0
## 75 Oak/Other Parc_Michel_Chartrand 2
## 76 Pine/Oak Parc_Michel_Chartrand 0
## 77 Pine Parc_Michel_Chartrand 0
## 78 Other Parc_Michel_Chartrand 2
## 79 Oak Parc_Pointe_aux_Prairies 0
## 80 Oak/Pine Parc_Pointe_aux_Prairies 0
## 81 Oak/Other Parc_Pointe_aux_Prairies 2
## 82 Pine/Oak Parc_Pointe_aux_Prairies 0
## 83 Pine Parc_Pointe_aux_Prairies 0
## 84 Other Parc_Pointe_aux_Prairies 4
## 85 Oak Rigaud 7
## 86 Oak/Pine Rigaud 8
## 87 Oak/Other Rigaud 2
## 88 Pine/Oak Rigaud 4
## 89 Pine Rigaud 8
## 90 Other Rigaud 0
ggplot(contingency_df, aes(x = Var1, y = Var2, fill = Freq)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "darkred") +
labs(x = "Stand Type", y = "Patch Name", fill = "Frequency") +
theme_minimal()

# Save the plot as an image (e.g., PNG)
#ggsave("contingency_table_heatmap.png", width = 7.5, height = 6)
#convert the data in the heatmap plot to a table format
wide_table <- contingency_df %>%
pivot_wider(names_from = Var1, values_from = Freq, values_fill = 0)
wide_table <- wide_table %>%
mutate(
Var2 = trimws(as.character(Var2)),
Var2 = dplyr::recode(Var2,
"Mont_Gauvin/Glen" = "Mont Gauvin",
"Mont_Royal" = "Mont Royal",
"Mont_Saint_Bruno" = "Mont Saint Bruno",
"Mont_Saint_Hilaire" = "Mont Saint Hilaire",
"Notre_Dame_de_Bonsecours" = "Notre Dame de Bonsecours",
"Oka" = "Parc Oka",
"Orford" = "Mont Orford",
"Parc_Michel_Chartrand" = "Parc Michel Chartrand",
"Parc_Pointe_aux_Prairies" = "Parc Pointe aux Prairies",
"Rigaud" = "Mont Rigaud",
))
##print(wide_table)
patch_order <- c("Papineauville", "Montebello", "Notre Dame de Bonsecours",
"Kenauk", "Brownsburg", "Mont Rigaud", "Parc Oka",
"Mont Royal",
"Parc Pointe aux Prairies", "Parc Michel Chartrand",
"Mont Saint Bruno", "Mont Saint Hilaire","Mont Gauvin",
"Mont Orford", "Hatley")
wide_table <- wide_table %>%
mutate(Var2 = factor(Var2, levels = patch_order)) %>%
arrange(Var2)
##print(wide_table)
gt_table <- wide_table %>%
gt() %>%
cols_label(
Var2 = "Patch Name"
) %>%
tab_options(
table.font.size = 12,
heading.title.font.size = 16,
heading.subtitle.font.size = 14
) %>%
cols_align(
align = "center",
columns = everything()
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels(everything())
) %>%
tab_style( # Custom striping on even-numbered rows
style = cell_fill(color = "#f9f9f0"), # You can change this color
locations = cells_body(rows = seq(2, nrow(wide_table), 2))
)
##gtsave(gt_table, "heatmap_table.png") # Image
##gtsave(gt_table, "heatmap_table.html") # Web preview
version$version.string
## [1] "R version 4.4.3 (2025-02-28)"
# Summary statistics for moth count by stand_category and stand_type
summary_stats <- stand_type_filtered %>%
group_by(stand_category, stand_type, patch_name) %>%
summarise(
mean_count = mean(total_moth_count, na.rm = TRUE),
sd_count = sd(total_moth_count, na.rm = TRUE),
var_count = var(total_moth_count, na.rm = TRUE),
count = n()
)
## `summarise()` has grouped output by 'stand_category', 'stand_type'. You can
## override using the `.groups` argument.
##print(summary_stats, n=22)
# Table to use -----------------------------------------------------------
# Summary statistics for moth count by stand_type, differentiating between
##traps set and traps with usable data
summary_stats_3 <- stand_type_filtered %>%
group_by(stand_type) %>%
summarise(
mean_count = mean(clean_complete, na.rm = TRUE),
sd_count = sd(clean_complete, na.rm = TRUE),
n_traps = n(),
n_obs = sum(!is.na (clean_complete))
)
print(summary_stats_3, n=22)
## # A tibble: 6 × 5
## stand_type mean_count sd_count n_traps n_obs
## <fct> <dbl> <dbl> <int> <int>
## 1 Oak 77.0 69.8 73 57
## 2 Oak/Pine 64.4 52.6 41 30
## 3 Oak/Other 37.3 27.0 33 29
## 4 Pine/Oak 85.8 61.1 29 20
## 5 Pine 54.8 33.2 39 27
## 6 Other 36.9 26.8 30 27
# Round numeric columns to 2 decimal places
summary_stats_3$mean_count <- round(summary_stats_3$mean_count, 2)
summary_stats_3$sd_count <- round(summary_stats_3$sd_count, 2)
summary_stats_3$count <- round(summary_stats_3$n_obs, 2)
# Summary statistics for moth count by patch
summary_stats_4 <- stand_type_filtered %>%
group_by(patch_name) %>%
summarise(
mean_count = mean(clean_complete, na.rm = TRUE),
sd_count = sd(clean_complete, na.rm = TRUE),
count = n()
)
print(summary_stats_4, n=22)
## # A tibble: 15 × 4
## patch_name mean_count sd_count count
## <chr> <dbl> <dbl> <int>
## 1 "Brownsburg" 38.6 9.40 6
## 2 "Hatley" 74.8 18.4 6
## 3 "Kenauk" 26.8 20.4 6
## 4 "Mont_Gauvin/Glen " 34.1 11.6 6
## 5 "Mont_Royal" 35.5 32.0 13
## 6 "Mont_Saint_Bruno" 24.7 13.9 12
## 7 "Mont_Saint_Hilaire" 23.5 29.6 26
## 8 "Montebello" 54 44.8 21
## 9 "Notre_Dame_de_Bonsecours" 54.8 32.9 17
## 10 "Oka" 105. 63.6 42
## 11 "Orford" 88.0 67.9 37
## 12 "Papineauville" 71.0 56.2 12
## 13 "Parc_Michel_Chartrand" 70.2 17.4 6
## 14 "Parc_Pointe_aux_Prairies" 43.3 32.7 6
## 15 "Rigaud" 38.1 26.4 29
## Remove MOM row in 'complete' moth counts
moth_by_stand_summary_stats <- summary_stats_3[-c(1),]
print(moth_by_stand_summary_stats, n=22)
## # A tibble: 5 × 6
## stand_type mean_count sd_count n_traps n_obs count
## <fct> <dbl> <dbl> <int> <int> <dbl>
## 1 Oak/Pine 64.4 52.6 41 30 30
## 2 Oak/Other 37.3 27.0 33 29 29
## 3 Pine/Oak 85.8 61.1 29 20 20
## 4 Pine 54.8 33.2 39 27 27
## 5 Other 36.9 26.8 30 27 27
## Remove MOM row in 'clean_complete' moth counts
moth_by_stand_summary_stats_2 <- summary_stats_4[-c(1),]
print(moth_by_stand_summary_stats_2, n=22)
## # A tibble: 14 × 4
## patch_name mean_count sd_count count
## <chr> <dbl> <dbl> <int>
## 1 "Hatley" 74.8 18.4 6
## 2 "Kenauk" 26.8 20.4 6
## 3 "Mont_Gauvin/Glen " 34.1 11.6 6
## 4 "Mont_Royal" 35.5 32.0 13
## 5 "Mont_Saint_Bruno" 24.7 13.9 12
## 6 "Mont_Saint_Hilaire" 23.5 29.6 26
## 7 "Montebello" 54 44.8 21
## 8 "Notre_Dame_de_Bonsecours" 54.8 32.9 17
## 9 "Oka" 105. 63.6 42
## 10 "Orford" 88.0 67.9 37
## 11 "Papineauville" 71.0 56.2 12
## 12 "Parc_Michel_Chartrand" 70.2 17.4 6
## 13 "Parc_Pointe_aux_Prairies" 43.3 32.7 6
## 14 "Rigaud" 38.1 26.4 29
###NEED TO SAVE AND EXPORT THESE SUMMARY TABLES###
# Visualizations ----------------------------------------------------------
ggplot(stand_ID_filtered, aes(x = Percent_Oak, y = clean_complete)) +
geom_point(alpha = 0.5) +
stat_smooth(method = "glm", method.args = list(family = "poisson"),
se = TRUE, color = "darkgreen") +
labs(
title = "Effect of Percent Oak on Moth Counts",
x = "% Oak Cover",
y = "Moth Count"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 55 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning in dpois(y, mu, log = TRUE): non-integer x = 28.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 39.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 53.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 91.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 71.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 77.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 46.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 25.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 35.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 8.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 42.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 26.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 43.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 86.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 29.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 68.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 92.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 80.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 57.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 81.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 59.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 50.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 54.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 55.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 62.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 34.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 50.750000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 32.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 77.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 49.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 79.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 97.500000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 64.250000
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.250000
## Warning: Removed 55 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Negative Binomial models ------------------------------------------------
#checking compatibility of glmmTMB and lme4 packages
packageVersion("glmmTMB")
## [1] '1.1.13'
packageVersion("lme4")
## [1] '1.1.37'
packageVersion("Matrix")
## [1] '1.7.4'
packageVersion("TMB")
## [1] '1.9.18'
packageVersion("mgcv")
## [1] '1.9.1'
sapply(c("Matrix", "TMB", "lme4", "glmmTMB"), find.package)
## Matrix
## "/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Matrix"
## TMB
## "/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/TMB"
## lme4
## "/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/lme4"
## glmmTMB
## "/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/glmmTMB"
#need to convert to 'factor' for negative binomial
stand_ID_filtered$patch_name <- as.factor(stand_ID_filtered$patch_name)
stand_ID_filtered$stand_ID <- as.factor(stand_ID_filtered$stand_ID)
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:lme4':
##
## lmList
##
## The following object is masked from 'package:dplyr':
##
## collapse
##
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
##
## Attaching package: 'mgcv'
##
## The following objects are masked from 'package:brms':
##
## s, t2
##NB with Oak
Oak_nb_model <- gam(round(clean_complete) ~ Percent_Oak +
s(patch_name, bs = "re") + # random effect for patch_name
s(stand_ID, bs = "re"), # random effect for stand_ID
family = nb(), # negative binomial
method = "REML",
data = stand_ID_filtered)
summary(Oak_nb_model)
##
## Family: Negative Binomial(2.488)
## Link function: log
##
## Formula:
## round(clean_complete) ~ Percent_Oak + s(patch_name, bs = "re") +
## s(stand_ID, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.5213 0.1636 21.524 < 2e-16 ***
## Percent_Oak 0.6857 0.2653 2.585 0.00975 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(patch_name) 10.46 14 185.75 < 2e-16 ***
## s(stand_ID) 35.46 129 65.87 0.00472 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.456 Deviance explained = 56.1%
## -REML = 942.8 Scale est. = 1 n = 190
plot(Oak_nb_model, pages = 1)

# Fitted values
fitted_vals_oak <- fitted(Oak_nb_model)
# Pearson residuals
pearson_resid_oak <- residuals(Oak_nb_model, type = "pearson")
# Residual degrees of freedom
rdf_oak <- df.residual(Oak_nb_model)
plot(fitted_vals_oak, pearson_resid_oak,
xlab="Fitted values", ylab="Pearson residuals")
abline(h=0, col="red")

# Dispersion ratio
dispersion_oak <- sum(pearson_resid_oak^2) / rdf_oak
dispersion_oak
## [1] 0.8515649
#dispersion = 0.852, indicating that there is no over (or much under) dispersion
performance::check_model(Oak_nb_model, residual_type = "normal")

##NB with Pine
Pine_nb_model <- gam(round(clean_complete) ~ Percent_Pine +
s(patch_name, bs = "re") + # random effect for patch_name
s(stand_ID, bs = "re"), # random effect for stand_ID
family = nb(), # negative binomial
method = "REML",
data = stand_ID_filtered)
summary(Pine_nb_model)
##
## Family: Negative Binomial(2.518)
## Link function: log
##
## Formula:
## round(clean_complete) ~ Percent_Pine + s(patch_name, bs = "re") +
## s(stand_ID, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.7206 0.1376 27.036 <2e-16 ***
## Percent_Pine 0.2016 0.3034 0.664 0.506
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(patch_name) 10.06 14 161.96 < 2e-16 ***
## s(stand_ID) 41.21 129 82.84 0.00112 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.448 Deviance explained = 57.9%
## -REML = 945.55 Scale est. = 1 n = 190
plot(Pine_nb_model, pages = 1)

# Fitted values
fitted_vals_pine <- fitted(Pine_nb_model)
# Pearson residuals
pearson_resid_pine <- residuals(Pine_nb_model, type = "pearson")
# Residual degrees of freedom
rdf_pine <- df.residual(Pine_nb_model)
plot(fitted_vals_pine, pearson_resid_pine,
xlab="Fitted values", ylab="Pearson residuals")
abline(h=0, col="red")

# Dispersion ratio
dispersion_pine <- sum(pearson_resid_pine^2) / rdf_pine
dispersion_pine
## [1] 0.8448824
#dispersion = 0.845, indicating that there is no over (or much under) dispersion
performance::check_model(Pine_nb_model, residual_type = "normal")

##NB with both Oak and Pine
Both_nb_model <- gam(round(clean_complete) ~ Percent_Oak + Percent_Pine +
Percent_Oak*Percent_Pine +
s(patch_name, bs = "re") + # random effect for patch_name
s(stand_ID, bs = "re"), # random effect for stand_ID
family = nb(), # negative binomial
method = "REML",
data = stand_ID_filtered)
summary(Both_nb_model)
##
## Family: Negative Binomial(2.663)
## Link function: log
##
## Formula:
## round(clean_complete) ~ Percent_Oak + Percent_Pine + Percent_Oak *
## Percent_Pine + s(patch_name, bs = "re") + s(stand_ID, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.2773 0.1881 17.419 < 2e-16 ***
## Percent_Oak 0.9823 0.3214 3.057 0.00224 **
## Percent_Pine 0.6611 0.3769 1.754 0.07939 .
## Percent_Oak:Percent_Pine 1.7512 1.8331 0.955 0.33941
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(patch_name) 10.21 14 166.49 < 2e-16 ***
## s(stand_ID) 40.02 127 79.11 3.91e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.461 Deviance explained = 60.3%
## -REML = 938.48 Scale est. = 1 n = 190
plot(Both_nb_model, pages = 1)

# Fitted values
fitted_vals_both <- fitted(Both_nb_model)
# Pearson residuals
pearson_resid_both <- residuals(Both_nb_model, type = "pearson")
# Residual degrees of freedom
rdf_both <- df.residual(Both_nb_model)
plot(fitted_vals_both, pearson_resid_both,
xlab="Fitted values", ylab="Pearson residuals")
abline(h=0, col="red")

# Dispersion ratio
dispersion_both <- sum(pearson_resid_both^2) / rdf_both
dispersion_both
## [1] 0.8628572
#dispersion = 0.852, indicating that there is no over (or much under) dispersion
performance::check_model(Both_nb_model, residual_type = "normal")

#Adding a variable looking at year 1 and 2
Year_nb_model <- gam(round(clean_complete) ~
Year +
s(patch_name, bs = "re") + # random effect for patch_name
s(stand_ID, bs = "re"), # random effect for stand_ID
family = nb(), # negative binomial
method = "REML",
data = stand_ID_filtered)
summary(Year_nb_model)
##
## Family: Negative Binomial(2.594)
## Link function: log
##
## Formula:
## round(clean_complete) ~ Year + s(patch_name, bs = "re") + s(stand_ID,
## bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.4825 0.1475 23.61 < 2e-16 ***
## Year1 0.7215 0.1366 5.28 1.29e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(patch_name) 10.93 14 149.03 <2e-16 ***
## s(stand_ID) 26.25 129 39.73 0.0237 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.479 Deviance explained = 55.5%
## -REML = 934.83 Scale est. = 1 n = 190
plot(Year_nb_model, pages = 1)

# Fitted values
fitted_vals_year <- fitted(Year_nb_model)
# Pearson residuals
pearson_resid_year <- residuals(Year_nb_model, type = "pearson")
# Residual degrees of freedom
rdf_year <- df.residual(Year_nb_model)
plot(fitted_vals_year, pearson_resid_year,
xlab="Fitted values", ylab="Pearson residuals")
abline(h=0, col="red")

# Dispersion ratio
dispersion_year <- sum(pearson_resid_year^2) / rdf_year
dispersion_year
## [1] 0.819819
#dispersion = 0.820, indicating that there is no over (or much under) dispersion
performance::check_model(Year_nb_model, residual_type = "normal")

# All Variables -----------------------------------------------------------
##Fitting a gam model with all of the possible response variables, to
#explore a correlation between all possible variables and moth counts
all_variable_nb <- gam(round(clean_complete) ~ Percent_Oak + Percent_Pine +
+ landscape_type + longitude +
forest_area_km2 + stand_area_ha +
s(patch_name, bs = "re") + # random effect for patch_name
s(stand_ID, bs = "re"), # random effect for stand_ID
family = nb(), # negative binomial
method = "REML",
data = stand_ID_filtered)
summary(all_variable_nb)
##
## Family: Negative Binomial(2.785)
## Link function: log
##
## Formula:
## round(clean_complete) ~ Percent_Oak + Percent_Pine + +landscape_type +
## longitude + forest_area_km2 + stand_area_ha + s(patch_name,
## bs = "re") + s(stand_ID, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.8753148 14.0950754 0.488 0.6257
## Percent_Oak 0.9746785 0.3210116 3.036 0.0024 **
## Percent_Pine 0.6667394 0.3631498 1.836 0.0664 .
## landscape_typeforest 0.5757522 0.3651147 1.577 0.1148
## landscape_typeurban -0.1991551 0.3834729 -0.519 0.6035
## longitude 0.0472188 0.1917962 0.246 0.8055
## forest_area_km2 -0.0004520 0.0003947 -1.145 0.2522
## stand_area_ha -0.0144073 0.0207391 -0.695 0.4872
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(patch_name) 8.931 14 172.76 < 2e-16 ***
## s(stand_ID) 41.467 124 77.11 3.57e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.471 Deviance explained = 62.9%
## -REML = 949.47 Scale est. = 1 n = 190
plot(all_variable_nb, pages = 1)

# Fitted values
fitted_vals_all <- fitted(all_variable_nb)
# Pearson residuals
pearson_resid_all <- residuals(all_variable_nb, type = "pearson")
# Residual degrees of freedom
rdf_all <- df.residual(all_variable_nb)
plot(fitted_vals_all, pearson_resid_all,
xlab="Fitted values", ylab="Pearson residuals")
abline(h=0, col="red")

# Dispersion ratio
dispersion_all <- sum(pearson_resid_all^2) / rdf_all
dispersion_all
## [1] 0.893226
#dispersion = 0.893, indicating that there is no over (or much under) dispersion
performance::check_model(all_variable_nb, residual_type = "normal")

# Print a clean table to the console or a markdown/HTML-friendly output
# Save directly to a file
tab_model(all_variable_nb,
show.stat = TRUE,
p.style = "numeric",
file = "model_summary.doc") # can be .doc, .html, .htm
|
Â
|
round(clean complete)
|
|
Predictors
|
Incidence Rate Ratios
|
CI
|
Statistic
|
p
|
|
(Intercept)
|
968.08
|
0.00 – 963077404573204.62
|
0.49
|
0.626
|
|
Percent Oak
|
2.65
|
1.41 – 4.97
|
3.04
|
0.002
|
|
Percent Pine
|
1.95
|
0.96 – 3.97
|
1.84
|
0.066
|
|
landscape typeforest
|
1.78
|
0.87 – 3.64
|
1.58
|
0.115
|
|
landscape type [urban]
|
0.82
|
0.39 – 1.74
|
-0.52
|
0.604
|
|
longitude
|
1.05
|
0.72 – 1.53
|
0.25
|
0.806
|
|
forest area km2
|
1.00
|
1.00 – 1.00
|
-1.15
|
0.252
|
|
stand area ha
|
0.99
|
0.95 – 1.03
|
-0.69
|
0.487
|
|
Smooth term (patch name)
|
|
|
172.76
|
<0.001
|
|
Smooth term (stand ID)
|
|
|
77.11
|
<0.001
|
|
Observations
|
190
|
|
R2
|
0.471
|
#checking for collinearity between variables
#VIF = 1 indicates no multicollinearity
collinearity_all <- check_collinearity(all_variable_nb)
print(collinearity_all)
## # Check for Multicollinearity
##
## Low Correlation
##
## Term VIF VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
## Percent_Oak 2.69 [ 2.20, 3.38] 1.64 0.37 [0.30, 0.46]
## Percent_Pine 3.73 [ 3.00, 4.75] 1.93 0.27 [0.21, 0.33]
## stand_area_ha 2.88 [ 2.34, 3.63] 1.70 0.35 [0.28, 0.43]
##
## High Correlation
##
## Term VIF VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
## longitude 16.52 [12.80, 21.42] 4.06 0.06 [0.05, 0.08]
## forest_area_km2 11.25 [ 8.76, 14.55] 3.35 0.09 [0.07, 0.11]
# checking for co-variation between numeric predictors
#in a Pearson correlation matrix
numeric_vars <- stand_ID_filtered[, c("Percent_Oak", "Percent_Pine", "longitude",
"forest_area_km2", "stand_area_ha")]
# Correlation matrix
cor(numeric_vars, use = "complete.obs")
## Percent_Oak Percent_Pine longitude forest_area_km2
## Percent_Oak 1.00000000 -0.52500780 -0.15613141 -0.01062395
## Percent_Pine -0.52500780 1.00000000 -0.11765413 0.05010221
## longitude -0.15613141 -0.11765413 1.00000000 -0.41050141
## forest_area_km2 -0.01062395 0.05010221 -0.41050141 1.00000000
## stand_area_ha -0.03911951 0.09618211 0.03658612 -0.24775945
## stand_area_ha
## Percent_Oak -0.03911951
## Percent_Pine 0.09618211
## longitude 0.03658612
## forest_area_km2 -0.24775945
## stand_area_ha 1.00000000
library(gt)
cor_mat <- cor(numeric_vars, use = "complete.obs")
cor_df <- as.data.frame(round(cor_mat, 3))
cor_df |> gt::gt() |> gt::tab_caption("Correlation Matrix")
Correlation Matrix
| Percent_Oak |
Percent_Pine |
longitude |
forest_area_km2 |
stand_area_ha |
| 1.000 |
-0.525 |
-0.156 |
-0.011 |
-0.039 |
| -0.525 |
1.000 |
-0.118 |
0.050 |
0.096 |
| -0.156 |
-0.118 |
1.000 |
-0.411 |
0.037 |
| -0.011 |
0.050 |
-0.411 |
1.000 |
-0.248 |
| -0.039 |
0.096 |
0.037 |
-0.248 |
1.000 |
ggplot(stand_ID_filtered, aes(x = Percent_Oak, y = clean_complete, color = landscape_type)) +
geom_point(alpha = 0.6) + # Add raw data points with transparency
geom_smooth(method = "lm", se = FALSE) + # Add regression lines by group
theme_minimal() +
labs(
title = " ",
x = "Percent Oak",
y = "Moth Counts",
color = "Landscape Type"
) +
scale_color_viridis_d(option = "D") # You can also try options "A", "B", "C", "E"
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 55 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 55 rows containing missing values or values outside the scale range
## (`geom_point()`).
